# -*- coding: utf-8 -*-

import json
import re
from collections import OrderedDict

# example006.json
with open(r'test.json', "r", encoding='gbk') as f:
    dic =json.load(f)

    tep_dics=[]
    sett=set()
    for key, value in dic.items():
        text = value["描述"]
        # 使用正则表达式查找answer中的中文
        pattern = re.compile(r'[\u4e00-\u9fa5]+')  # 中文字符的Unicode编码范围是0x4e00到0x9fa5
        chinese_chars = pattern.findall(key)
        # 将中文字符合并成一个字符串
        chinese_str = ''.join(chinese_chars)

        for key2, value2 in value.items():
            if key2 == "描述":
                continue
            tep_dic = {"ask": "根据文本内容:“" + str(text) + "”回答实体类型是什么", "answer": chinese_str}
            sett.add(chinese_str)
            tep_dics.append(tep_dic)

            lst = list({tuple(d.items()) for d in tep_dics})
            tep_dics = [dict(item) for item in lst]
            # tep_dics = list(OrderedDict.fromkeys(map(tuple, tep_dics)))

print(sett)
with open(r'example008.json', "a+", encoding="gbk") as f:
    f.write(json.dumps(tep_dics, ensure_ascii=False) + "\n")



